{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "AwhxwHTf4VZp"
   },
   "source": [
    "## Evaluating RAG with RAGAs using GPT-4o\n",
    "\n",
    "Ragas is a **framework for evaluating Retrieval Augmented Generation (RAG) pipelines**.\n",
    "\n",
    "Ragas provides you with the tools/metrics based on the latest research for evaluating LLM-generated text to give you insights about your RAG pipeline. Ragas can be integrated with your CI/CD to provide continuous checks to ensure performance.\n",
    "\n",
    "GPT4-o is used as an LLM to generate responses out of semantically close context chunks.\n",
    "\n",
    "![flow.png]()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "1mr4fXemsYci",
    "outputId": "d45c8226-3f4a-4bd6-cb4b-62e7e83d4023"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m27.4/27.4 MB\u001b[0m \u001b[31m40.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m30.5/30.5 MB\u001b[0m \u001b[31m28.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m157.5/157.5 kB\u001b[0m \u001b[31m10.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.1/71.1 kB\u001b[0m \u001b[31m4.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m480.6/480.6 kB\u001b[0m \u001b[31m23.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.4/2.4 MB\u001b[0m \u001b[31m66.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.0/1.0 MB\u001b[0m \u001b[31m37.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m409.5/409.5 kB\u001b[0m \u001b[31m24.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m66.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.6/50.6 kB\u001b[0m \u001b[31m3.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m38.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m8.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m179.3/179.3 kB\u001b[0m \u001b[31m8.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m10.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m12.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.5/49.5 kB\u001b[0m \u001b[31m3.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
      "gcsfs 2024.10.0 requires fsspec==2024.10.0, but you have fsspec 2024.9.0 which is incompatible.\u001b[0m\u001b[31m\n",
      "\u001b[0m"
     ]
    }
   ],
   "source": [
    "!pip install langchain openai lancedb ragas -q"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "z8hT0Jn74ZmT"
   },
   "source": [
    "### Setup `OPENAI_API_KEY` as an environment variable"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "YHgQd_1rI04R"
   },
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "os.environ[\"OPENAI_API_KEY\"] = \"sk-proj-....\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "mM0tf_vo6GbI"
   },
   "source": [
    "### Load .txt file and convert them into chunks"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "IkLbg-_1I3Rt",
    "outputId": "38f3beb7-b4bd-423e-b77f-f2c79f16990e"
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING:langchain_text_splitters.base:Created a chunk of size 215, which is longer than the specified 200\n",
      "WARNING:langchain_text_splitters.base:Created a chunk of size 232, which is longer than the specified 200\n",
      "WARNING:langchain_text_splitters.base:Created a chunk of size 242, which is longer than the specified 200\n",
      "WARNING:langchain_text_splitters.base:Created a chunk of size 219, which is longer than the specified 200\n",
      "WARNING:langchain_text_splitters.base:Created a chunk of size 304, which is longer than the specified 200\n",
      "WARNING:langchain_text_splitters.base:Created a chunk of size 205, which is longer than the specified 200\n",
      "WARNING:langchain_text_splitters.base:Created a chunk of size 332, which is longer than the specified 200\n",
      "WARNING:langchain_text_splitters.base:Created a chunk of size 215, which is longer than the specified 200\n",
      "WARNING:langchain_text_splitters.base:Created a chunk of size 203, which is longer than the specified 200\n",
      "WARNING:langchain_text_splitters.base:Created a chunk of size 281, which is longer than the specified 200\n",
      "WARNING:langchain_text_splitters.base:Created a chunk of size 201, which is longer than the specified 200\n",
      "WARNING:langchain_text_splitters.base:Created a chunk of size 250, which is longer than the specified 200\n",
      "WARNING:langchain_text_splitters.base:Created a chunk of size 325, which is longer than the specified 200\n",
      "WARNING:langchain_text_splitters.base:Created a chunk of size 242, which is longer than the specified 200\n"
     ]
    }
   ],
   "source": [
    "import requests\n",
    "from langchain.document_loaders import TextLoader\n",
    "from langchain.text_splitter import CharacterTextSplitter\n",
    "\n",
    "url = \"https://raw.githubusercontent.com/hwchase17/chroma-langchain/master/state_of_the_union.txt\"\n",
    "res = requests.get(url)\n",
    "with open(\"state_of_the_union.txt\", \"w\") as f:\n",
    "    f.write(res.text)\n",
    "\n",
    "# Load the data\n",
    "loader = TextLoader(\"./state_of_the_union.txt\")\n",
    "documents = loader.load()\n",
    "\n",
    "# Chunk the data\n",
    "text_splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=10)\n",
    "chunks = text_splitter.split_documents(documents)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "pgetSLZXEJ2Q"
   },
   "source": [
    "### Setup Retriever\n",
    "\n",
    "Retriever utilizes **LanceDB** for scalable vector search and advanced retrieval in RAG, delivering blazing fast performance for searching large sets of embeddings."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "id": "2PYhU_vvJC0P"
   },
   "outputs": [],
   "source": [
    "from langchain.embeddings import OpenAIEmbeddings\n",
    "from langchain.vectorstores import LanceDB\n",
    "import lancedb\n",
    "\n",
    "openai_embed = OpenAIEmbeddings()\n",
    "\n",
    "# Setup lancedb\n",
    "db = lancedb.connect(\"/tmp/lancedb\")\n",
    "table = db.create_table(\n",
    "    \"raga_eval\",\n",
    "    data=[{\"vector\": openai_embed.embed_query(\"Hello World\"), \"text\": \"Hello World\"}],\n",
    "    mode=\"overwrite\",\n",
    ")\n",
    "\n",
    "# Populate vector database\n",
    "vectorstore = LanceDB.from_documents(\n",
    "    connection=db, documents=chunks, embedding=openai_embed\n",
    ")\n",
    "\n",
    "# Define vectorstore as retriever to enable semantic search\n",
    "retriever = vectorstore.as_retriever()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "9CFnNEfuExj7"
   },
   "source": [
    "### Setup RAG Pipeline with Prompt template"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "-TiQhbNyLSKv",
    "outputId": "7ab35c53-74f5-471d-e51b-bc1068873d12"
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-9-fe6c5e5e17b4>:7: LangChainDeprecationWarning: The class `ChatOpenAI` was deprecated in LangChain 0.0.10 and will be removed in 1.0. An updated version of the class exists in the :class:`~langchain-openai package and should be used instead. To use it run `pip install -U :class:`~langchain-openai` and import as `from :class:`~langchain_openai import ChatOpenAI``.\n",
      "  llm = ChatOpenAI(model_name=\"gpt-4o\", temperature=0)\n"
     ]
    }
   ],
   "source": [
    "from langchain.chat_models import ChatOpenAI\n",
    "from langchain.prompts import ChatPromptTemplate\n",
    "from langchain.schema.runnable import RunnablePassthrough\n",
    "from langchain.schema.output_parser import StrOutputParser\n",
    "\n",
    "# Define LLM\n",
    "llm = ChatOpenAI(model_name=\"gpt-4o\", temperature=0)\n",
    "\n",
    "# Define Prompt template\n",
    "template = \"\"\"You are an assistant for question-answering tasks.\n",
    "Use the following pieces of retrieved context to answer the question.\n",
    "If you don't know the answer, just say that you don't know.\n",
    "Use two sentences maximum and keep the answer concise.\n",
    "Question: {question}\n",
    "Context: {context}\n",
    "Answer:\n",
    "\"\"\"\n",
    "\n",
    "prompt = ChatPromptTemplate.from_template(template)\n",
    "\n",
    "# Setup RAG pipeline\n",
    "rag_chain = (\n",
    "    {\"context\": retriever, \"question\": RunnablePassthrough()}\n",
    "    | prompt\n",
    "    | llm\n",
    "    | StrOutputParser()\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "Ge8JtkNXFXhI"
   },
   "source": [
    "### Sample Questions with their Expected Answers\n",
    "\n",
    "Define a set of questions with their answers for creating dataset including ground truth, generated answers with their context using which they are generated."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "PGiU57QJMP0J",
    "outputId": "02907160-556f-4c52-8246-ce8983ad0d7d"
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-10-d89a36ccef76>:20: LangChainDeprecationWarning: The method `BaseRetriever.get_relevant_documents` was deprecated in langchain-core 0.1.46 and will be removed in 1.0. Use :meth:`~invoke` instead.\n",
      "  [docs.page_content for docs in retriever.get_relevant_documents(query)]\n"
     ]
    }
   ],
   "source": [
    "from datasets import Dataset\n",
    "\n",
    "questions = [\n",
    "    \"What did the president say about Justice Breyer?\",\n",
    "    \"What did the president say about Intel's CEO?\",\n",
    "    \"What did the president say about gun violence?\",\n",
    "]\n",
    "ground_truth = [\n",
    "    \"The president said that Justice Breyer has dedicated his life to serve the country and thanked him for his service.\",\n",
    "    \"The president said that Pat Gelsinger is ready to increase Intel's investment to $100 billion.\",\n",
    "    \"The president asked Congress to pass proven measures to reduce gun violence.\",\n",
    "]\n",
    "answers = []\n",
    "contexts = []\n",
    "\n",
    "# Inference\n",
    "for query in questions:\n",
    "    answers.append(rag_chain.invoke(query))\n",
    "    contexts.append(\n",
    "        [docs.page_content for docs in retriever.get_relevant_documents(query)]\n",
    "    )\n",
    "\n",
    "# To dict\n",
    "data = {\n",
    "    \"question\": questions,\n",
    "    \"answer\": answers,\n",
    "    \"contexts\": contexts,\n",
    "    \"ground_truth\": ground_truth,\n",
    "}\n",
    "\n",
    "# Convert dict to dataset\n",
    "dataset = Dataset.from_dict(data)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "szBZ1nwkFruF"
   },
   "source": [
    "### RAGA Evaluation Pipeline\n",
    "\n",
    "Simple pipeline of RAGA for evaluation with the listed metrics to understand and evaluate the RAG system.\n",
    "\n",
    "**Metrics** on which we will evaulate are answer_correctness,\n",
    "faithfulness,\n",
    "answer_similarity,\n",
    "context_precision,\n",
    "context_utilization,\n",
    "context_recall,\n",
    "context_relevancy,\n",
    "answer_relevancy, and\n",
    "context_entity_recall"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 49,
     "referenced_widgets": [
      "c47a266b53544dc082e9c5279d8cfd9f",
      "cf05971b750c4504acd2267f2ef8bd49",
      "cf8c29dd27b34058bc7a408fac30f4f4",
      "9c03315f38924551a5e367dad53ea1ca",
      "58fdb24797da4e8487b52d26dd84e836",
      "02560cc7f0104df496b1e2f4ce917c6f",
      "5106c77bdaa946cfa9585f88b26cef6c",
      "3ef897bc6f8a4f32b4b81438f9796106",
      "7597383882584bd29c08e3371c238b3c",
      "9ae18f1d919949c8935a9b8f270e93c3",
      "6f5710bdbc0c4bfbbc5e684745402e47"
     ]
    },
    "id": "Samkm2TnMUQA",
    "outputId": "795b3f93-4aac-4fbb-de64-2b06480cc57a"
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c47a266b53544dc082e9c5279d8cfd9f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Evaluating:   0%|          | 0/21 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from ragas import evaluate\n",
    "from ragas.metrics import (\n",
    "    answer_correctness,\n",
    "    faithfulness,\n",
    "    answer_similarity,\n",
    "    context_precision,\n",
    "    context_recall,\n",
    "    answer_relevancy,\n",
    "    context_entity_recall,\n",
    ")\n",
    "\n",
    "\n",
    "# evaluating dataest on listed metrics\n",
    "result = evaluate(\n",
    "    dataset=dataset,\n",
    "    metrics=[\n",
    "        answer_correctness,\n",
    "        faithfulness,\n",
    "        answer_similarity,\n",
    "        context_precision,\n",
    "        context_recall,\n",
    "        answer_relevancy,\n",
    "        context_entity_recall,\n",
    "    ],\n",
    ")\n",
    "\n",
    "\n",
    "df = result.to_pandas()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 441
    },
    "id": "77QVRXtVMbCG",
    "outputId": "3fd675f0-6511-486c-f919-08155c08d06c"
   },
   "outputs": [
    {
     "data": {
      "application/vnd.google.colaboratory.intrinsic+json": {
       "summary": "{\n  \"name\": \"df\",\n  \"rows\": 3,\n  \"fields\": [\n    {\n      \"column\": \"user_input\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 3,\n        \"samples\": [\n          \"What did the president say about Justice Breyer?\",\n          \"What did the president say about Intel's CEO?\",\n          \"What did the president say about gun violence?\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"retrieved_contexts\",\n      \"properties\": {\n        \"dtype\": \"object\",\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"response\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 3,\n        \"samples\": [\n          \"The president honored Justice Stephen Breyer for his dedication to serving the country, acknowledging him as an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court, and thanked him for his service.\",\n          \"The president said that Intel\\u2019s CEO, Pat Gelsinger, told him they are ready to increase their investment from $20 billion to $100 billion.\",\n          \"The president called for Congress to pass proven measures to reduce gun violence, including universal background checks, banning assault weapons and high-capacity magazines, and repealing the liability shield for gun manufacturers.\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"reference\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 3,\n        \"samples\": [\n          \"The president said that Justice Breyer has dedicated his life to serve the country and thanked him for his service.\",\n          \"The president said that Pat Gelsinger is ready to increase Intel's investment to $100 billion.\",\n          \"The president asked Congress to pass proven measures to reduce gun violence.\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"answer_correctness\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0.23687834777938732,\n        \"min\": 0.5359520548823975,\n        \"max\": 0.9950187529685631,\n        \"num_unique_values\": 3,\n        \"samples\": [\n          0.6641079904090759,\n          0.9950187529685631,\n          0.5359520548823975\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"faithfulness\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0.28867513459481287,\n        \"min\": 0.5,\n        \"max\": 1.0,\n        \"num_unique_values\": 2,\n        \"samples\": [\n          0.5,\n          1.0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"semantic_similarity\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0.02143452649654375,\n        \"min\": 0.9421462473505888,\n        \"max\": 0.9800750118742526,\n        \"num_unique_values\": 3,\n        \"samples\": [\n          0.9421462473505888,\n          0.9800750118742526\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"context_precision\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0.28867513456594535,\n        \"min\": 0.49999999995,\n        \"max\": 0.9999999999,\n        \"num_unique_values\": 2,\n        \"samples\": [\n          0.9999999999,\n          0.49999999995\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"context_recall\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0.0,\n        \"min\": 1.0,\n        \"max\": 1.0,\n        \"num_unique_values\": 1,\n        \"samples\": [\n          1.0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"answer_relevancy\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0.01817965674116965,\n        \"min\": 0.9284069541601007,\n        \"max\": 0.9644181895157772,\n        \"num_unique_values\": 3,\n        \"samples\": [\n          0.9644181895157772\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"context_entity_recall\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0.2204792754233461,\n        \"min\": 0.3333333322222222,\n        \"max\": 0.7499999981250001,\n        \"num_unique_values\": 3,\n        \"samples\": [\n          0.3333333322222222\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}",
       "type": "dataframe",
       "variable_name": "df"
      },
      "text/html": [
       "\n",
       "  <div id=\"df-37d8e33e-582b-4282-ac2f-701bae8a273e\" class=\"colab-df-container\">\n",
       "    <div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user_input</th>\n",
       "      <th>retrieved_contexts</th>\n",
       "      <th>response</th>\n",
       "      <th>reference</th>\n",
       "      <th>answer_correctness</th>\n",
       "      <th>faithfulness</th>\n",
       "      <th>semantic_similarity</th>\n",
       "      <th>context_precision</th>\n",
       "      <th>context_recall</th>\n",
       "      <th>answer_relevancy</th>\n",
       "      <th>context_entity_recall</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>What did the president say about Justice Breyer?</td>\n",
       "      <td>[And I did that 4 days ago, when I nominated C...</td>\n",
       "      <td>The president honored Justice Stephen Breyer f...</td>\n",
       "      <td>The president said that Justice Breyer has ded...</td>\n",
       "      <td>0.664108</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.942146</td>\n",
       "      <td>0.5</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.964418</td>\n",
       "      <td>0.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>What did the president say about Intel's CEO?</td>\n",
       "      <td>[Intel’s CEO, Pat Gelsinger, who is here tonig...</td>\n",
       "      <td>The president said that Intel’s CEO, Pat Gelsi...</td>\n",
       "      <td>The president said that Pat Gelsinger is ready...</td>\n",
       "      <td>0.995019</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.980075</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.950759</td>\n",
       "      <td>0.750000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>What did the president say about gun violence?</td>\n",
       "      <td>[And I ask Congress to pass proven measures to...</td>\n",
       "      <td>The president called for Congress to pass prov...</td>\n",
       "      <td>The president asked Congress to pass proven me...</td>\n",
       "      <td>0.535952</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.943808</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.928407</td>\n",
       "      <td>0.666667</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>\n",
       "    <div class=\"colab-df-buttons\">\n",
       "\n",
       "  <div class=\"colab-df-container\">\n",
       "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-37d8e33e-582b-4282-ac2f-701bae8a273e')\"\n",
       "            title=\"Convert this dataframe to an interactive table.\"\n",
       "            style=\"display:none;\">\n",
       "\n",
       "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
       "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
       "  </svg>\n",
       "    </button>\n",
       "\n",
       "  <style>\n",
       "    .colab-df-container {\n",
       "      display:flex;\n",
       "      gap: 12px;\n",
       "    }\n",
       "\n",
       "    .colab-df-convert {\n",
       "      background-color: #E8F0FE;\n",
       "      border: none;\n",
       "      border-radius: 50%;\n",
       "      cursor: pointer;\n",
       "      display: none;\n",
       "      fill: #1967D2;\n",
       "      height: 32px;\n",
       "      padding: 0 0 0 0;\n",
       "      width: 32px;\n",
       "    }\n",
       "\n",
       "    .colab-df-convert:hover {\n",
       "      background-color: #E2EBFA;\n",
       "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
       "      fill: #174EA6;\n",
       "    }\n",
       "\n",
       "    .colab-df-buttons div {\n",
       "      margin-bottom: 4px;\n",
       "    }\n",
       "\n",
       "    [theme=dark] .colab-df-convert {\n",
       "      background-color: #3B4455;\n",
       "      fill: #D2E3FC;\n",
       "    }\n",
       "\n",
       "    [theme=dark] .colab-df-convert:hover {\n",
       "      background-color: #434B5C;\n",
       "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
       "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
       "      fill: #FFFFFF;\n",
       "    }\n",
       "  </style>\n",
       "\n",
       "    <script>\n",
       "      const buttonEl =\n",
       "        document.querySelector('#df-37d8e33e-582b-4282-ac2f-701bae8a273e button.colab-df-convert');\n",
       "      buttonEl.style.display =\n",
       "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
       "\n",
       "      async function convertToInteractive(key) {\n",
       "        const element = document.querySelector('#df-37d8e33e-582b-4282-ac2f-701bae8a273e');\n",
       "        const dataTable =\n",
       "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
       "                                                    [key], {});\n",
       "        if (!dataTable) return;\n",
       "\n",
       "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
       "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
       "          + ' to learn more about interactive tables.';\n",
       "        element.innerHTML = '';\n",
       "        dataTable['output_type'] = 'display_data';\n",
       "        await google.colab.output.renderOutput(dataTable, element);\n",
       "        const docLink = document.createElement('div');\n",
       "        docLink.innerHTML = docLinkHtml;\n",
       "        element.appendChild(docLink);\n",
       "      }\n",
       "    </script>\n",
       "  </div>\n",
       "\n",
       "\n",
       "<div id=\"df-df691c9c-74ef-4b8c-befc-861a5ed78e99\">\n",
       "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-df691c9c-74ef-4b8c-befc-861a5ed78e99')\"\n",
       "            title=\"Suggest charts\"\n",
       "            style=\"display:none;\">\n",
       "\n",
       "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
       "     width=\"24px\">\n",
       "    <g>\n",
       "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
       "    </g>\n",
       "</svg>\n",
       "  </button>\n",
       "\n",
       "<style>\n",
       "  .colab-df-quickchart {\n",
       "      --bg-color: #E8F0FE;\n",
       "      --fill-color: #1967D2;\n",
       "      --hover-bg-color: #E2EBFA;\n",
       "      --hover-fill-color: #174EA6;\n",
       "      --disabled-fill-color: #AAA;\n",
       "      --disabled-bg-color: #DDD;\n",
       "  }\n",
       "\n",
       "  [theme=dark] .colab-df-quickchart {\n",
       "      --bg-color: #3B4455;\n",
       "      --fill-color: #D2E3FC;\n",
       "      --hover-bg-color: #434B5C;\n",
       "      --hover-fill-color: #FFFFFF;\n",
       "      --disabled-bg-color: #3B4455;\n",
       "      --disabled-fill-color: #666;\n",
       "  }\n",
       "\n",
       "  .colab-df-quickchart {\n",
       "    background-color: var(--bg-color);\n",
       "    border: none;\n",
       "    border-radius: 50%;\n",
       "    cursor: pointer;\n",
       "    display: none;\n",
       "    fill: var(--fill-color);\n",
       "    height: 32px;\n",
       "    padding: 0;\n",
       "    width: 32px;\n",
       "  }\n",
       "\n",
       "  .colab-df-quickchart:hover {\n",
       "    background-color: var(--hover-bg-color);\n",
       "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
       "    fill: var(--button-hover-fill-color);\n",
       "  }\n",
       "\n",
       "  .colab-df-quickchart-complete:disabled,\n",
       "  .colab-df-quickchart-complete:disabled:hover {\n",
       "    background-color: var(--disabled-bg-color);\n",
       "    fill: var(--disabled-fill-color);\n",
       "    box-shadow: none;\n",
       "  }\n",
       "\n",
       "  .colab-df-spinner {\n",
       "    border: 2px solid var(--fill-color);\n",
       "    border-color: transparent;\n",
       "    border-bottom-color: var(--fill-color);\n",
       "    animation:\n",
       "      spin 1s steps(1) infinite;\n",
       "  }\n",
       "\n",
       "  @keyframes spin {\n",
       "    0% {\n",
       "      border-color: transparent;\n",
       "      border-bottom-color: var(--fill-color);\n",
       "      border-left-color: var(--fill-color);\n",
       "    }\n",
       "    20% {\n",
       "      border-color: transparent;\n",
       "      border-left-color: var(--fill-color);\n",
       "      border-top-color: var(--fill-color);\n",
       "    }\n",
       "    30% {\n",
       "      border-color: transparent;\n",
       "      border-left-color: var(--fill-color);\n",
       "      border-top-color: var(--fill-color);\n",
       "      border-right-color: var(--fill-color);\n",
       "    }\n",
       "    40% {\n",
       "      border-color: transparent;\n",
       "      border-right-color: var(--fill-color);\n",
       "      border-top-color: var(--fill-color);\n",
       "    }\n",
       "    60% {\n",
       "      border-color: transparent;\n",
       "      border-right-color: var(--fill-color);\n",
       "    }\n",
       "    80% {\n",
       "      border-color: transparent;\n",
       "      border-right-color: var(--fill-color);\n",
       "      border-bottom-color: var(--fill-color);\n",
       "    }\n",
       "    90% {\n",
       "      border-color: transparent;\n",
       "      border-bottom-color: var(--fill-color);\n",
       "    }\n",
       "  }\n",
       "</style>\n",
       "\n",
       "  <script>\n",
       "    async function quickchart(key) {\n",
       "      const quickchartButtonEl =\n",
       "        document.querySelector('#' + key + ' button');\n",
       "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
       "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
       "      try {\n",
       "        const charts = await google.colab.kernel.invokeFunction(\n",
       "            'suggestCharts', [key], {});\n",
       "      } catch (error) {\n",
       "        console.error('Error during call to suggestCharts:', error);\n",
       "      }\n",
       "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
       "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
       "    }\n",
       "    (() => {\n",
       "      let quickchartButtonEl =\n",
       "        document.querySelector('#df-df691c9c-74ef-4b8c-befc-861a5ed78e99 button');\n",
       "      quickchartButtonEl.style.display =\n",
       "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
       "    })();\n",
       "  </script>\n",
       "</div>\n",
       "\n",
       "  <div id=\"id_28adaf1d-0fcf-45ab-9b84-cb772f4995d1\">\n",
       "    <style>\n",
       "      .colab-df-generate {\n",
       "        background-color: #E8F0FE;\n",
       "        border: none;\n",
       "        border-radius: 50%;\n",
       "        cursor: pointer;\n",
       "        display: none;\n",
       "        fill: #1967D2;\n",
       "        height: 32px;\n",
       "        padding: 0 0 0 0;\n",
       "        width: 32px;\n",
       "      }\n",
       "\n",
       "      .colab-df-generate:hover {\n",
       "        background-color: #E2EBFA;\n",
       "        box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
       "        fill: #174EA6;\n",
       "      }\n",
       "\n",
       "      [theme=dark] .colab-df-generate {\n",
       "        background-color: #3B4455;\n",
       "        fill: #D2E3FC;\n",
       "      }\n",
       "\n",
       "      [theme=dark] .colab-df-generate:hover {\n",
       "        background-color: #434B5C;\n",
       "        box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
       "        filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
       "        fill: #FFFFFF;\n",
       "      }\n",
       "    </style>\n",
       "    <button class=\"colab-df-generate\" onclick=\"generateWithVariable('df')\"\n",
       "            title=\"Generate code using this dataframe.\"\n",
       "            style=\"display:none;\">\n",
       "\n",
       "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
       "       width=\"24px\">\n",
       "    <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n",
       "  </svg>\n",
       "    </button>\n",
       "    <script>\n",
       "      (() => {\n",
       "      const buttonEl =\n",
       "        document.querySelector('#id_28adaf1d-0fcf-45ab-9b84-cb772f4995d1 button.colab-df-generate');\n",
       "      buttonEl.style.display =\n",
       "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
       "\n",
       "      buttonEl.onclick = () => {\n",
       "        google.colab.notebook.generateWithVariable('df');\n",
       "      }\n",
       "      })();\n",
       "    </script>\n",
       "  </div>\n",
       "\n",
       "    </div>\n",
       "  </div>\n"
      ],
      "text/plain": [
       "                                         user_input  \\\n",
       "0  What did the president say about Justice Breyer?   \n",
       "1     What did the president say about Intel's CEO?   \n",
       "2    What did the president say about gun violence?   \n",
       "\n",
       "                                  retrieved_contexts  \\\n",
       "0  [And I did that 4 days ago, when I nominated C...   \n",
       "1  [Intel’s CEO, Pat Gelsinger, who is here tonig...   \n",
       "2  [And I ask Congress to pass proven measures to...   \n",
       "\n",
       "                                            response  \\\n",
       "0  The president honored Justice Stephen Breyer f...   \n",
       "1  The president said that Intel’s CEO, Pat Gelsi...   \n",
       "2  The president called for Congress to pass prov...   \n",
       "\n",
       "                                           reference  answer_correctness  \\\n",
       "0  The president said that Justice Breyer has ded...            0.664108   \n",
       "1  The president said that Pat Gelsinger is ready...            0.995019   \n",
       "2  The president asked Congress to pass proven me...            0.535952   \n",
       "\n",
       "   faithfulness  semantic_similarity  context_precision  context_recall  \\\n",
       "0           1.0             0.942146                0.5             1.0   \n",
       "1           0.5             0.980075                1.0             1.0   \n",
       "2           1.0             0.943808                1.0             1.0   \n",
       "\n",
       "   answer_relevancy  context_entity_recall  \n",
       "0          0.964418               0.333333  \n",
       "1          0.950759               0.750000  \n",
       "2          0.928407               0.666667  "
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# evaluation metrics\n",
    "df"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3",
   "name": "python3"
  },
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
